"""
Phase 1: Data Assembly — Trilemma Panel
=======================================
Loads Aizenman-Chinn-Ito trilemma indices (already downloaded) and
Ilzetzki-Reinhart-Rogoff exchange rate regime classifications.
Merges with full_panel.csv. Constructs derived variables and summary statistics.
"""

import io
from pathlib import Path

import numpy as np
import pandas as pd

# ── Paths ──────────────────────────────────────────────────────────────
PROJECT_DIR = Path("/mnt/c/demographics_capital_flows/trilemma")
MULTILATERAL_DIR = PROJECT_DIR.parent / "multilateral"
PROCESSED_DIR = PROJECT_DIR / "data" / "processed"
RAW_DIR = PROJECT_DIR / "data" / "raw"
TABLES_DIR = PROJECT_DIR / "output" / "tables"

for d in [PROCESSED_DIR, RAW_DIR, TABLES_DIR]:
    d.mkdir(parents=True, exist_ok=True)

FULL_PANEL = MULTILATERAL_DIR / "followup" / "data" / "processed" / "full_panel.csv"

# ── IMF IFS country code → ISO3 mapping ──────────────────────────────
# Source: IMF IFS code list (numeric codes used in trilemma dataset)
IMF_TO_ISO3 = {
    112: "GBR", 122: "AUT", 124: "BEL", 128: "DNK", 132: "FRA",
    134: "DEU", 136: "ITA", 137: "LUX", 138: "NLD", 142: "NOR",
    144: "SWE", 146: "CHE", 156: "CAN", 158: "JPN", 172: "FIN",
    174: "GRC", 176: "ISL", 178: "IRL", 182: "PRT", 184: "ESP",
    186: "TUR", 193: "AUS", 196: "NZL", 199: "ZAF", 111: "USA",
    213: "ARG", 218: "BOL", 223: "BRA", 228: "CHL", 233: "COL",
    238: "CRI", 243: "DOM", 248: "ECU", 253: "SLV", 258: "GTM",
    263: "HTI", 268: "HND", 273: "MEX", 278: "NIC", 283: "PAN",
    288: "PRY", 293: "PER", 298: "URY", 299: "VEN",
    311: "ATG", 313: "BHS", 316: "BRB", 321: "DMA", 328: "GRD",
    336: "GUY", 339: "BLZ", 343: "JAM", 366: "TTO",
    419: "BHR", 423: "CYP", 429: "IRN", 433: "IRQ", 436: "ISR",
    439: "JOR", 443: "KWT", 446: "LBN", 449: "OMN", 453: "QAT",
    456: "SAU", 463: "SYR", 466: "ARE", 469: "EGY",
    474: "YEM", 512: "AFG", 513: "BGD", 514: "BTN", 516: "MMR",
    518: "LKA", 522: "HKG", 524: "IND", 532: "IDN", 534: "KOR",
    536: "LAO", 548: "MYS", 556: "MDV", 558: "MNG", 564: "NPL",
    566: "PAK", 576: "SGP", 578: "THA", 582: "VNM",
    536: "LAO", 542: "TWN", 546: "MAC",
    548: "MYS", 556: "MDV", 564: "NPL", 566: "PAK",
    576: "SGP", 578: "THA", 582: "VNM",
    612: "DZA", 616: "BWA", 618: "BDI", 622: "CMR", 624: "CPV",
    626: "CAF", 628: "TCD", 632: "COG", 634: "BEN", 636: "GNQ",
    638: "ETH", 642: "GAB", 644: "GMB", 646: "GHA", 648: "GIN",
    652: "CIV", 654: "KEN", 656: "LSO", 664: "MDG", 666: "MWI",
    668: "MLI", 672: "MUS", 674: "MAR", 676: "MOZ", 678: "NER",
    682: "NGA", 684: "ZWE", 686: "RWA", 688: "SEN", 694: "SLE",
    698: "SOM", 714: "SWZ", 716: "TZA", 718: "TGO", 722: "TUN",
    724: "UGA", 726: "BFA", 728: "COD", 734: "ZMB",
    738: "LBR", 742: "ERI", 744: "NAM", 746: "MRT",
    748: "AGO", 754: "DJI",
    911: "ARM", 912: "AZE", 913: "BLR", 914: "ALB", 915: "GEO",
    916: "KAZ", 917: "KGZ", 918: "BGR", 921: "MDA", 922: "RUS",
    923: "TKM", 924: "UKR", 925: "EST", 926: "LVA", 927: "LTU",
    935: "CZE", 936: "SVK", 939: "SVN", 941: "HRV", 943: "BIH",
    944: "HUN", 946: "MKD", 948: "MNE", 960: "ROU", 962: "POL",
    964: "SRB", 968: "CHN", 532: "IDN",
    # Some codes that may be in data but not mapped above
    111: "USA", 163: "EMU",  # Euro area aggregate
    135: "SMR",  # San Marino
}


# ── Country name → ISO3 mapping for IRR data ─────────────────────────
COUNTRY_TO_ISO3 = {
    'Afghanistan': 'AFG', 'Albania': 'ALB', 'Algeria': 'DZA', 'Angola': 'AGO',
    'Argentina': 'ARG', 'Armenia': 'ARM', 'Australia': 'AUS', 'Austria': 'AUT',
    'Azerbaijan': 'AZE', 'Bahamas': 'BHS', 'Bangladesh': 'BGD', 'Barbados': 'BRB',
    'Belarus': 'BLR', 'Belgium': 'BEL', 'Belize': 'BLZ', 'Benin': 'BEN',
    'Bhutan': 'BTN', 'Bolivia': 'BOL', 'Bosnia': 'BIH', 'Botswana': 'BWA',
    'Brazil': 'BRA', 'Brunei': 'BRN', 'Bulgaria': 'BGR', 'Burkina': 'BFA',
    'Burundi': 'BDI', 'Cambodia': 'KHM', 'Cameroon': 'CMR', 'Canada': 'CAN',
    'Cape Verde': 'CPV', 'Central African': 'CAF', 'Chad': 'TCD', 'Chile': 'CHL',
    'China': 'CHN', 'Colombia': 'COL', 'Comoros': 'COM', 'Congo': 'COG',
    'Costa Rica': 'CRI', "Cote d'Ivoire": 'CIV', 'Croatia': 'HRV', 'Cuba': 'CUB',
    'Cyprus': 'CYP', 'Czech': 'CZE', 'Denmark': 'DNK', 'Djibouti': 'DJI',
    'Dominican': 'DOM', 'Ecuador': 'ECU', 'Egypt': 'EGY', 'El Salvador': 'SLV',
    'Equatorial Guinea': 'GNQ', 'Eritrea': 'ERI', 'Estonia': 'EST', 'Ethiopia': 'ETH',
    'Fiji': 'FJI', 'Finland': 'FIN', 'France': 'FRA', 'Gabon': 'GAB',
    'Gambia': 'GMB', 'Georgia': 'GEO', 'Germany': 'DEU', 'Ghana': 'GHA',
    'Greece': 'GRC', 'Guatemala': 'GTM', 'Guinea': 'GIN', 'Guyana': 'GUY',
    'Haiti': 'HTI', 'Honduras': 'HND', 'Hong Kong': 'HKG', 'Hungary': 'HUN',
    'Iceland': 'ISL', 'India': 'IND', 'Indonesia': 'IDN', 'Iran': 'IRN',
    'Iraq': 'IRQ', 'Ireland': 'IRL', 'Israel': 'ISR', 'Italy': 'ITA',
    'Jamaica': 'JAM', 'Japan': 'JPN', 'Jordan': 'JOR', 'Kazakhstan': 'KAZ',
    'Kenya': 'KEN', 'Korea': 'KOR', 'Kuwait': 'KWT', 'Kyrgyz': 'KGZ',
    'Lao': 'LAO', 'Latvia': 'LVA', 'Lebanon': 'LBN', 'Lesotho': 'LSO',
    'Liberia': 'LBR', 'Libya': 'LBY', 'Lithuania': 'LTU', 'Luxembourg': 'LUX',
    'Macedonia': 'MKD', 'Madagascar': 'MDG', 'Malawi': 'MWI', 'Malaysia': 'MYS',
    'Maldives': 'MDV', 'Mali': 'MLI', 'Malta': 'MLT', 'Mauritania': 'MRT',
    'Mauritius': 'MUS', 'Mexico': 'MEX', 'Moldova': 'MDA', 'Mongolia': 'MNG',
    'Montenegro': 'MNE', 'Morocco': 'MAR', 'Mozambique': 'MOZ', 'Myanmar': 'MMR',
    'Namibia': 'NAM', 'Nepal': 'NPL', 'Netherlands': 'NLD', 'New Zealand': 'NZL',
    'Nicaragua': 'NIC', 'Niger': 'NER', 'Nigeria': 'NGA', 'Norway': 'NOR',
    'Oman': 'OMN', 'Pakistan': 'PAK', 'Panama': 'PAN', 'Papua New Guinea': 'PNG',
    'Paraguay': 'PRY', 'Peru': 'PER', 'Philippines': 'PHL', 'Poland': 'POL',
    'Portugal': 'PRT', 'Qatar': 'QAT', 'Romania': 'ROU', 'Russia': 'RUS',
    'Rwanda': 'RWA', 'Saudi Arabia': 'SAU', 'Senegal': 'SEN', 'Serbia': 'SRB',
    'Sierra Leone': 'SLE', 'Singapore': 'SGP', 'Slovak': 'SVK', 'Slovenia': 'SVN',
    'Somalia': 'SOM', 'South Africa': 'ZAF', 'Spain': 'ESP', 'Sri Lanka': 'LKA',
    'Sudan': 'SDN', 'Suriname': 'SUR', 'Swaziland': 'SWZ', 'Sweden': 'SWE',
    'Switzerland': 'CHE', 'Syria': 'SYR', 'Taiwan': 'TWN', 'Tajikistan': 'TJK',
    'Tanzania': 'TZA', 'Thailand': 'THA', 'Togo': 'TGO', 'Trinidad': 'TTO',
    'Tunisia': 'TUN', 'Turkey': 'TUR', 'Turkmenistan': 'TKM', 'Uganda': 'UGA',
    'Ukraine': 'UKR', 'United Arab': 'ARE', 'United Kingdom': 'GBR',
    'United States': 'USA', 'Uruguay': 'URY', 'Uzbekistan': 'UZB',
    'Venezuela': 'VEN', 'Vietnam': 'VNM', 'Yemen': 'YEM', 'Zambia': 'ZMB',
    'Zimbabwe': 'ZWE', 'Dem. Rep.': 'COD', 'Ivory Coast': 'CIV',
}


# ── Parse Trilemma Indices ────────────────────────────────────────────

def load_trilemma_indices():
    """Load and parse downloaded trilemma indices Excel."""
    path = RAW_DIR / "trilemma_indexes.xlsx"
    if not path.exists():
        print("  ERROR: trilemma_indexes.xlsx not found in data/raw/")
        return None

    print("  Loading trilemma indices from Excel ...")
    df = pd.read_excel(path)
    print(f"    Shape: {df.shape}")
    print(f"    Columns: {list(df.columns)}")

    # Rename columns
    df = df.rename(columns={
        'IMF-World Bank Country Code': 'imf_code',
        'year': 'year',
        'Exchange Rate Stability Index': 'ers_index',
        'Monetary Independence Index': 'mi_index',
        'Financial Openness Index': 'fo_index',
        'Country Name': 'country_name',
    })

    # Map IMF code → ISO3
    df['iso3'] = df['imf_code'].map(IMF_TO_ISO3)

    # For unmapped codes, try country name
    unmapped = df[df['iso3'].isna()]['country_name'].unique()
    if len(unmapped) > 0:
        print(f"    Unmapped countries ({len(unmapped)}): {list(unmapped[:10])}...")
        for name in unmapped:
            for key, iso3 in COUNTRY_TO_ISO3.items():
                if key.lower() in str(name).lower():
                    df.loc[df['country_name'] == name, 'iso3'] = iso3
                    break

    # Drop aggregates (Euro Area etc.)
    df = df[df['iso3'].notna() & (df['iso3'] != 'EMU')].copy()

    print(f"    Parsed: {len(df)} obs, {df['iso3'].nunique()} countries")
    print(f"    Year range: {df['year'].min()}-{df['year'].max()}")
    for col in ['mi_index', 'ers_index', 'fo_index']:
        s = df[col].dropna()
        print(f"    {col}: N={len(s)}, mean={s.mean():.3f}, std={s.std():.3f}")

    return df[['iso3', 'year', 'mi_index', 'ers_index', 'fo_index']].copy()


# ── Parse IRR Regime Data ─────────────────────────────────────────────

def load_irr_regimes():
    """Load and parse IRR regime Excel (wide format: rows=years, cols=countries)."""
    path = RAW_DIR / "irr_regimes.xlsx"
    if not path.exists():
        print("  WARNING: irr_regimes.xlsx not found. Will derive regimes from ERS index.")
        return None

    print("  Loading IRR regime classification ...")

    # Read both Coarse and Fine sheets
    results = {}
    for sheet, col_name in [('Coarse', 'regime_coarse'), ('Fine', 'regime_fine')]:
        try:
            raw = pd.read_excel(path, sheet_name=sheet)
        except Exception as e:
            print(f"    Sheet {sheet} failed: {e}")
            continue

        # Row 4 has country names, row 6+ has yearly data
        # Column 1 ("Independence") has years starting at row 6
        country_names = raw.iloc[4, 2:].values  # skip first 2 cols
        years = raw.iloc[6:, 1].values  # "Independence" column has years

        # Extract regime matrix
        regime_matrix = raw.iloc[6:, 2:].values

        rows = []
        for i, yr in enumerate(years):
            yr_num = pd.to_numeric(yr, errors='coerce')
            if pd.isna(yr_num):
                continue
            for j, cname in enumerate(country_names):
                if pd.isna(cname) or str(cname).strip() == '':
                    continue
                val = pd.to_numeric(regime_matrix[i, j], errors='coerce')
                if pd.isna(val):
                    continue
                rows.append({
                    'country_name': str(cname).strip(),
                    'year': int(yr_num),
                    col_name: val
                })

        df = pd.DataFrame(rows)
        print(f"    {sheet}: {len(df)} obs")
        results[col_name] = df

    # Merge fine and coarse
    if 'regime_coarse' in results and 'regime_fine' in results:
        merged = results['regime_coarse'].merge(
            results['regime_fine'], on=['country_name', 'year'], how='outer'
        )
    elif 'regime_coarse' in results:
        merged = results['regime_coarse']
    elif 'regime_fine' in results:
        merged = results['regime_fine']
    else:
        return None

    # Map country names to ISO3
    merged['iso3'] = np.nan
    for key, iso3 in COUNTRY_TO_ISO3.items():
        mask = merged['country_name'].str.contains(key, case=False, na=False)
        merged.loc[mask & merged['iso3'].isna(), 'iso3'] = iso3

    mapped = merged['iso3'].notna().sum()
    total = len(merged)
    print(f"    Country mapping: {mapped}/{total} obs mapped to ISO3")

    unmapped_names = merged[merged['iso3'].isna()]['country_name'].unique()
    if len(unmapped_names) > 0:
        print(f"    Unmapped ({len(unmapped_names)}): {list(unmapped_names[:15])}")

    merged = merged[merged['iso3'].notna()].copy()

    # IRR coarse: 1=peg, 2=crawling peg, 3=managed float, 4=free float, 5=free falling, 6=dual
    # Collapse to 3 categories: 1-2 = peg (1), 3 = intermediate (2), 4-6 = float (3)
    if 'regime_coarse' in merged.columns:
        merged['regime_3cat'] = merged['regime_coarse'].map({
            1: 1, 2: 1, 3: 2, 4: 3, 5: 3, 6: 3
        })
        print(f"    3-category regime distribution:")
        print(f"      {merged['regime_3cat'].value_counts().sort_index().to_dict()}")

    keep = ['iso3', 'year']
    if 'regime_coarse' in merged.columns:
        keep += ['regime_coarse', 'regime_3cat']
    if 'regime_fine' in merged.columns:
        keep.append('regime_fine')

    result = merged[keep].copy()
    print(f"    Final IRR: {len(result)} obs, {result['iso3'].nunique()} countries")
    return result


# ── Merge and Construct Variables ─────────────────────────────────────

def merge_panel(trilemma, irr):
    """Merge trilemma indices + IRR regimes with full_panel.csv."""
    print("\n  Loading full_panel.csv ...")
    fp = pd.read_csv(FULL_PANEL)
    fp = fp[fp['year'] <= 2024].copy()
    print(f"    Full panel: {len(fp)} obs, {fp['iso3'].nunique()} countries")

    # Merge trilemma indices
    if trilemma is not None:
        print("  Merging trilemma indices ...")
        panel = fp.merge(trilemma, on=['iso3', 'year'], how='left')
        n_tri = panel['mi_index'].notna().sum()
        print(f"    Trilemma coverage: {n_tri} obs "
              f"({n_tri/len(panel)*100:.1f}%)")
    else:
        panel = fp.copy()

    # Merge IRR regimes
    if irr is not None:
        print("  Merging IRR regimes ...")
        panel = panel.merge(irr, on=['iso3', 'year'], how='left')
        n_irr = panel['regime_coarse'].notna().sum() if 'regime_coarse' in panel.columns else 0
        print(f"    IRR coverage: {n_irr} obs ({n_irr/len(panel)*100:.1f}%)")

    # If no regime_3cat from IRR, derive from ers_index
    if 'regime_3cat' not in panel.columns or panel['regime_3cat'].notna().sum() == 0:
        if 'ers_index' in panel.columns:
            print("  Deriving regime_3cat from ers_index ...")
            panel['regime_3cat'] = pd.cut(
                panel['ers_index'],
                bins=[-np.inf, 0.33, 0.67, np.inf],
                labels=[3, 2, 1]  # low ERS = float, high ERS = peg
            ).astype(float)

    # If no regime_coarse from IRR, use regime_3cat as fallback
    if 'regime_coarse' not in panel.columns:
        panel['regime_coarse'] = panel.get('regime_3cat', np.nan)

    # ── Derived variables ──────────────────────────────────────────────

    # mi_sacrifice: binary — country sacrifices MI (mi_index below sample median)
    if 'mi_index' in panel.columns:
        mi_med = panel['mi_index'].median()
        panel['mi_sacrifice'] = (panel['mi_index'] < mi_med).astype(float)
        panel.loc[panel['mi_index'].isna(), 'mi_sacrifice'] = np.nan
        n_sac = panel['mi_sacrifice'].sum()
        print(f"  MI sacrifice: {n_sac:.0f} obs (median MI={mi_med:.3f})")

    # Trilemma corner: which dimension is sacrificed most?
    if all(c in panel.columns for c in ['mi_index', 'ers_index', 'fo_index']):
        tri_cols = panel[['mi_index', 'ers_index', 'fo_index']]
        min_idx = tri_cols.idxmin(axis=1)
        panel['trilemma_corner'] = min_idx.map({
            'mi_index': 1,   # sacrifice MI → peg + open
            'ers_index': 2,  # sacrifice ERS → float + open
            'fo_index': 3,   # sacrifice FO → closed + stable
        })
        corners = panel['trilemma_corner'].value_counts().sort_index()
        print(f"  Trilemma corners: {dict(corners)}")
        print(f"    1=peg+open, 2=float+open, 3=closed+stable")

        # Trilemma sum
        panel['trilemma_sum'] = panel['mi_index'] + panel['ers_index'] + panel['fo_index']
        print(f"  Trilemma sum: mean={panel['trilemma_sum'].mean():.3f}, "
              f"std={panel['trilemma_sum'].std():.3f}")

    # Binary regime indicators
    if 'regime_3cat' in panel.columns:
        panel['is_peg'] = (panel['regime_3cat'] == 1).astype(float)
        panel.loc[panel['regime_3cat'].isna(), 'is_peg'] = np.nan
        panel['is_float'] = (panel['regime_3cat'] == 3).astype(float)
        panel.loc[panel['regime_3cat'].isna(), 'is_float'] = np.nan
        print(f"  Peg share: {panel['is_peg'].mean():.3f}")
        print(f"  Float share: {panel['is_float'].mean():.3f}")

    # Regime transitions
    panel = panel.sort_values(['iso3', 'year'])
    if 'regime_3cat' in panel.columns:
        panel['regime_change'] = panel.groupby('iso3')['regime_3cat'].diff().abs()
        panel['regime_change'] = (panel['regime_change'] > 0).astype(float)
        panel.loc[panel['regime_3cat'].isna(), 'regime_change'] = np.nan
        print(f"  Regime transitions: {panel['regime_change'].sum():.0f}")

    # Eurozone membership
    EUROZONE = {
        'AUT': 1999, 'BEL': 1999, 'FIN': 1999, 'FRA': 1999, 'DEU': 1999,
        'IRL': 1999, 'ITA': 1999, 'LUX': 1999, 'NLD': 1999, 'PRT': 1999,
        'ESP': 1999, 'GRC': 2001, 'SVN': 2007, 'CYP': 2008, 'MLT': 2008,
        'SVK': 2009, 'EST': 2011, 'LVA': 2014, 'LTU': 2015,
    }
    panel['eurozone'] = panel.apply(
        lambda r: 1 if r['iso3'] in EUROZONE and r['year'] >= EUROZONE.get(r['iso3'], 9999) else 0,
        axis=1
    )
    print(f"  Eurozone obs: {panel['eurozone'].sum()}")

    # OECD floaters (non-eurozone OECD with floating rates) — for phase 5 comparisons
    OECD = {
        'AUS', 'AUT', 'BEL', 'CAN', 'CHL', 'COL', 'CRI', 'CZE', 'DNK', 'EST',
        'FIN', 'FRA', 'DEU', 'GRC', 'HUN', 'ISL', 'IRL', 'ISR', 'ITA', 'JPN',
        'KOR', 'LVA', 'LTU', 'LUX', 'MEX', 'NLD', 'NZL', 'NOR', 'POL', 'PRT',
        'SVK', 'SVN', 'ESP', 'SWE', 'CHE', 'TUR', 'GBR', 'USA',
    }
    panel['is_oecd'] = panel['iso3'].isin(OECD).astype(int)
    panel['oecd_floater'] = ((panel['is_oecd'] == 1) & (panel['eurozone'] == 0)).astype(int)

    print(f"\n  Final panel: {len(panel)} obs, {panel['iso3'].nunique()} countries")
    print(f"  Year range: {panel['year'].min()}-{panel['year'].max()}")

    return panel


# ── Summary Statistics ────────────────────────────────────────────────

def write_summary_stats(df):
    """Write summary statistics table."""
    print("\n  Writing summary statistics ...")

    vars_of_interest = [
        ('mi_index', 'Monetary Independence'),
        ('ers_index', 'Exchange Rate Stability'),
        ('fo_index', 'Financial Openness'),
        ('trilemma_sum', 'Trilemma Sum'),
        ('mi_sacrifice', 'MI Sacrifice (binary)'),
        ('regime_coarse', 'IRR Coarse Regime (1-6)'),
        ('regime_3cat', 'Regime 3-cat (1=peg,2=int,3=float)'),
        ('is_peg', 'Peg (binary)'),
        ('is_float', 'Float (binary)'),
        ('regime_change', 'Regime Transition'),
        ('eurozone', 'Eurozone Member'),
        ('ca_gdp', 'Current Account/GDP'),
        ('Z_1', 'Z₁ (Demographics)'),
        ('Z_2', 'Z₂'),
        ('Z_3', 'Z₃'),
        ('kaopen', 'KAOPEN'),
        ('old_dep', 'Old-age Dependency'),
        ('youth_dep', 'Youth Dependency'),
        ('rgdp_growth', 'Real GDP Growth'),
        ('nfa_gdp', 'NFA/GDP'),
    ]

    lines = ["# Summary Statistics: Trilemma Panel\n"]
    lines.append("| Variable | N | Mean | Std | Min | Max |")
    lines.append("|:---|---:|---:|---:|---:|---:|")

    for var, label in vars_of_interest:
        if var in df.columns:
            s = df[var].dropna()
            if len(s) > 0:
                lines.append(f"| {label} | {len(s)} | {s.mean():.3f} | "
                            f"{s.std():.3f} | {s.min():.3f} | {s.max():.3f} |")

    lines.append(f"\n*Panel: {df['iso3'].nunique()} countries, "
                 f"{df['year'].min()}-{df['year'].max()}*")

    # Trilemma corner distribution
    if 'trilemma_corner' in df.columns:
        lines.append("\n\n# Trilemma Corner Distribution\n")
        lines.append("| Corner | Label | N | Share |")
        lines.append("|:---|:---|---:|---:|")
        corner_labels = {1: 'Peg + Open (sacrifice MI)',
                         2: 'Float + Open (sacrifice ERS)',
                         3: 'Closed + Stable (sacrifice FO)'}
        total = df['trilemma_corner'].notna().sum()
        for corner, label in corner_labels.items():
            n = (df['trilemma_corner'] == corner).sum()
            share = n / total if total > 0 else 0
            lines.append(f"| {corner} | {label} | {n} | {share:.3f} |")

    # IRR regime distribution
    if 'regime_coarse' in df.columns:
        lines.append("\n\n# IRR Coarse Regime Distribution\n")
        lines.append("| Code | Label | N | Share |")
        lines.append("|:---|:---|---:|---:|")
        irr_labels = {1: 'No separate legal tender / peg',
                      2: 'Crawling peg / narrow band',
                      3: 'Managed floating',
                      4: 'Freely floating',
                      5: 'Freely falling',
                      6: 'Dual market'}
        total_r = df['regime_coarse'].notna().sum()
        for code, label in irr_labels.items():
            n = (df['regime_coarse'] == code).sum()
            share = n / total_r if total_r > 0 else 0
            lines.append(f"| {code} | {label} | {n} | {share:.3f} |")

    path = TABLES_DIR / "summary_statistics.md"
    path.write_text('\n'.join(lines))
    print(f"    Saved: {path}")


# ── Main ──────────────────────────────────────────────────────────────

def main():
    print("=" * 70)
    print("PHASE 1: DATA ASSEMBLY — TRILEMMA PANEL")
    print("=" * 70)

    # 1. Load trilemma indices
    trilemma = load_trilemma_indices()

    # 2. Load IRR regimes
    irr = load_irr_regimes()

    # 3. Merge everything
    panel = merge_panel(trilemma, irr)

    # 4. Save
    out_path = PROCESSED_DIR / "trilemma_panel.csv"
    panel.to_csv(out_path, index=False)
    print(f"\n  Saved: {out_path}")
    print(f"  Size: {out_path.stat().st_size / 1024:.0f} KB")

    # 5. Summary statistics
    write_summary_stats(panel)

    print("\n" + "=" * 70)
    print("PHASE 1 COMPLETE")
    print("=" * 70)


if __name__ == '__main__':
    main()
